#1.Load Data
ourdata <- data.frame(
  x= c(18, 7, 14, 31, 21, 5, 11, 16, 26, 29),
  y= c(55, 17, 36, 85, 62, 18, 33, 41, 63, 87)
)

y <- ourdata$y
x <- ourdata$x
ourdata

#2.Scatter Plot
plot(x,y)
     
     
#Does a linear adjustment seem justified? What coefficient should you calculate with R?
#based on the scatter diagram, we can suspect a linear relationship between these two
#variables. The points in the scatter diagram roughly follow a straight line,
#suggesting that there is a linear trend between the variables x and y.

#3.linear model:y-hat = b0-hat + b1-hat . xi
#Explanatory Variable: I.V , x
#Explained Variable: D.V , y
model <- lm(y~x, data = ourdata)
summary(model)

#b1= sum((xi - xbar)(yi-ybar))/sum((xi - xbar)^2)
#b0= ybar - b1.xbar
print(model$coefficients)

print(model$fitted.values)

#4.Residuals
residuals <- model$residuals
residuals

mean(residuals)#should be 0

#Check the normality of residuals by performing the shapiro test
#H0: errors are normally distributed
shapiro.test(residuals)
#Interpretation: W = ... (close to 1) and p = ... > 0.05 then we fail to reject H0.
#This suggests that the residuals are normally distributed


#5.Draw regression line
plot(x, y, xlab='Independent Variable(x)', ylab = 'Dependent Variable(y)')
abline(a=model$coefficients[1], b=model$coefficients[2], col='red')

#6.Make Predictions
predict(model, newdata = data.frame(x = c(21)))

#7.SST/SSR/SSE
sst <- sum((y - mean(y))^ 2)
sst

y_hat <- model$fitted.values
ssr <- sum((y_hat - mean(y))^ 2)
ssr

sse <- sum((y - y_hat)^ 2)
sse

#8.Coefficient of determination (R square)=SSR/SST or find it from the summary of the model
R_squared <- ssr/sst
R_squared
#Interpretation:This suggests that approximately --% of the variability in the 'y' (dependent variable) 
#can be explained by the linear regression model with the 'x'( independent variable)

#9.Error variance estimator"MSE"(s squared), s-squared = sse/n-2
n<- length(y)
n
s_squared <- sse / (n - 2)
s_squared

#10.variance of b0 and b1
x_bar <- mean(x)

#for 𝛽0
v_B0 <- s_squared * ( sum(x^2) / ( n * sum((x - x_bar )^2)) )
v_B0

#for 𝛽1
v_B1 <- s_squared / ( sum((x - x_bar )^2) )
v_B1

#11.Confidence interval:𝛽 +- t_critical. S𝛽
confint( model, level = .95)
#Interpretation:In general, the confidence interval shows that if we repeat 
#the test for all possible samples, 95% of the times, the true parameter will be within this interval.
#What we can conclude is that, if an coefficient confident interval includes zero, 
#then we must question the significance of this coefficient.
#so from the results ...

#12.hypothesis for b1 and b0
t_critical <- qt(df= n-2, p=0.025)
abs(t_critical)

summary <- summary(model)
coef_table <- summary$coefficients

#t for b0 (or from summary)
b0_t_value <- coef_table[5]
b0_t_value
#Interpretation:|t| > t-critical --> reject H0 --> the intercept is statistically significant
# or |t| < t-critical --> failed reject H0 --> the intercept is not statistically significant

#t for b1(or from summary)
b1_t_value <- coef_table[6]
b1_t_value
#Interpretation:|t| > t-critical --> reject H0 --> the slope is significantly diff from 0 --> existance of linear rlt
# or |t| < t-critical --> failed reject H0 --> not enough support to claim the existance of linear rlt

#13.anova
anova(model)
# f critical ,df:n-2
qf(df1=1,df2=n-2,p=.95)
#Interpretation:F = ... > F-critical (also p-value < 0.05) ⇒ reject H0 ⇒ β1 is different than zero :significant

#14.Correlation coefficient r and covariance cov Formulas:  cov(x,y) = Sxy / n-1 || r = cov(x,y) / (sx . sy)
 
cov <- cov(x , y)
cov

r <- cor(x , y)
r
#Interpretaion: we found that the correlation coefficient: r = 0.9135 wich indicate a strong positive linear relationship between x and y

#15.Finding β1 using r ,Formula: β1 = r . (sy/sx)

#Standard Devation of y
sy <- sd(y)

#Standard Devation of x
sx <- sd(x)

#Calculate the estimator of β1
b1 <- r * (sy/sx)
b1

#16.Hypothesis Test on r

#H0: ρ(x,y)=0
#H1: ρ(x,y)≠0

#t-test
test <- cor.test(x, y)
test

#t-critical
t_critical <- qt(df= n - 2, p=0.025)
abs(t_critical)

#Interpretation:t = 13.88 ⇒ |t| > t-critical ⇒ reject H0 ⇒ ρ(x,y) ≠ 0 : There exist a linear relationship between x and y



  
 